/***********************************************************************/
/*                                                                     */
/*                           Objective Caml                            */
/*                                                                     */
/*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         */
/*                                                                     */
/*  Copyright 1996 Institut National de Recherche en Informatique et   */
/*  en Automatique.  All rights reserved.  This file is distributed    */
/*  under the terms of the Q Public License version 1.0.               */
/*                                                                     */
/***********************************************************************/

/* $Id$ */

/* Asm part of the runtime system, Alpha processor */

#undef BROKEN_POSTINCREMENT

#define ADDRGLOBAL(reg,symb) \
  add reg = @ltoff(symb), gp;; ld8 reg = [reg]
#define LOADGLOBAL(reg,symb) \
  add r3 = @ltoff(symb), gp;; ld8 r3 = [r3];; ld8 reg = [r3]
#define STOREGLOBAL(reg,symb) \
  add r3 = @ltoff(symb), gp;; ld8 r3 = [r3];; st8 [r3] = reg

#define ST8OFF(a,b,d) st8 [a] = b, d
#define LD8OFF(a,b,d) ld8 a = [b], d
#define STFDOFF(a,b,d) stfd [a] = b, d
#define LDFDOFF(a,b,d) ldfd a = [b], d
#define STFSPILLOFF(a,b,d) stf.spill [a] = b, d
#define LDFFILLOFF(a,b,d) ldf.fill a = [b], d

#define SAVE2(a,b) ST8OFF(r2, a, 16); ST8OFF(r3, b, 16)
#define SAVE4(a,b,c,d) SAVE2(a,b);; SAVE2(c,d)
#define SAVE8(a,b,c,d,e,f,g,h) SAVE4(a,b,c,d);; SAVE4(e,f,g,h)

#define LOAD2(a,b) LD8OFF(a, r2, 16); LD8OFF(b, r3, 16)
#define LOAD4(a,b,c,d) LOAD2(a,b);; LOAD2(c,d)
#define LOAD8(a,b,c,d,e,f,g,h) LOAD4(a,b,c,d);; LOAD4(e,f,g,h)

#define FSAVE2(a,b) STFDOFF(r2, a, 16); STFDOFF(r3, b, 16)
#define FSAVE4(a,b,c,d) FSAVE2(a,b);; FSAVE2(c,d)
#define FSAVE8(a,b,c,d,e,f,g,h) FSAVE4(a,b,c,d);; FSAVE4(e,f,g,h)

#define FLOAD2(a,b) LDFDOFF(a, r2, 16); LDFDOFF(b, r3, 16)
#define FLOAD4(a,b,c,d) FLOAD2(a,b);; FLOAD2(c,d)
#define FLOAD8(a,b,c,d,e,f,g,h) FLOAD4(a,b,c,d);; FLOAD4(e,f,g,h)

#define FSPILL2(a,b) STFSPILLOFF(r2, a, 32); STFSPILLOFF(r3, b, 32)
#define FSPILL4(a,b,c,d) FSPILL2(a,b);; FSPILL2(c,d)
#define FSPILL8(a,b,c,d,e,f,g,h) FSPILL4(a,b,c,d);; FSPILL4(e,f,g,h)

#define FFILL2(a,b) LDFFILLOFF(a, r2, 32); LDFFILLOFF(b, r3, 32)
#define FFILL4(a,b,c,d) FFILL2(a,b);; FFILL2(c,d)
#define FFILL8(a,b,c,d,e,f,g,h) FFILL4(a,b,c,d);; FFILL4(e,f,g,h)

/* Allocation */
	.text

        .global caml_allocN#
        .proc   caml_allocN#
        .align 16

/* caml_allocN: all code generator registers preserved,
   gp preserved, r2 = requested size */

caml_allocN:
        sub     r4 = r4, r2 ;;
        cmp.ltu p0, p6 = r4, r5
        (p6) br.ret.sptk b0 ;;
        /* Stash return address at sp (in stack scratch area) */
        mov     r3 = b0 ;;
        st8     [sp] = r3
        /* Call GC */
        br.call.sptk b0 = caml_call_gc# ;;
        /* Return to caller */
        ld8     r3 = [sp] ;;
        mov     b0 = r3 ;;
        br.ret.sptk b0

        .endp   caml_allocN#

/* caml_call_gc: all code generator registers preserved,
   gp preserved, r2 = requested size */

        .global caml_call_gc#
        .proc   caml_call_gc#
        .align 16
caml_call_gc:
        /* Allocate stack frame */
        add     sp = -(16 + 16 + 80*8 + 42*8), sp ;;

        /* Save requested size and GP on stack */
        add     r3 = 16, sp ;;
        ST8OFF(r3, r2, 8) ;;
        st8     [r3] = gp

        /* Record lowest stack address, return address, GC regs */
        mov     r2 = b0 ;;
        STOREGLOBAL(r2, caml_last_return_address#)
        add     r2 = (16 + 16 + 80*8 + 42*8), sp ;;
        STOREGLOBAL(r2, caml_bottom_of_stack#)
        add     r2 = (16 + 16), sp ;;
        STOREGLOBAL(r2, caml_gc_regs#)

        /* Save all integer regs used by the code generator in the context */
.L100:  add     r3 = 8, r2 ;;
        SAVE4(r8,r9,r10,r11) ;;
        SAVE8(r16,r17,r18,r19,r20,r21,r22,r23) ;;
        SAVE8(r24,r25,r26,r27,r28,r29,r30,r31) ;;
        SAVE8(r32,r33,r34,r35,r36,r37,r38,r39) ;;
        SAVE8(r40,r41,r42,r43,r44,r45,r46,r47) ;;
        SAVE8(r48,r49,r50,r51,r52,r53,r54,r55) ;;
        SAVE8(r56,r57,r58,r59,r60,r61,r62,r63) ;;
        SAVE8(r64,r65,r66,r67,r68,r69,r70,r71) ;;
        SAVE8(r72,r73,r74,r75,r76,r77,r78,r79) ;;
        SAVE8(r80,r81,r82,r83,r84,r85,r86,r87) ;;
        SAVE4(r88,r89,r90,r91) ;;

        /* Save all floating-point registers not preserved by C */
        FSAVE2(f6,f7) ;;
        FSAVE8(f8,f9,f10,f11,f12,f13,f14,f15) ;;
        FSAVE8(f32,f33,f34,f35,f36,f37,f38,f39) ;;
        FSAVE8(f40,f41,f42,f43,f44,f45,f46,f47) ;;
        FSAVE8(f48,f49,f50,f51,f52,f53,f54,f55) ;;
        FSAVE8(f56,f57,f58,f59,f60,f61,f62,f63) ;;

        /* Save current allocation pointer for debugging purposes */
        STOREGLOBAL(r4, caml_young_ptr#)

        /* Save trap pointer in case an exception is raised */
        STOREGLOBAL(r6, caml_exception_pointer#)

        /* Call the garbage collector */
        br.call.sptk    b0 = caml_garbage_collection# ;;

        /* Restore gp */
        add     r3 = 24, sp ;;
        ld8     gp = [r3]

        /* Restore all integer regs from GC context */
        add     r2 = (16 + 16), sp ;;
        add     r3 = 8, r2 ;;
        LOAD4(r8,r9,r10,r11) ;;
        LOAD8(r16,r17,r18,r19,r20,r21,r22,r23) ;;
        LOAD8(r24,r25,r26,r27,r28,r29,r30,r31) ;;
        LOAD8(r32,r33,r34,r35,r36,r37,r38,r39) ;;
        LOAD8(r40,r41,r42,r43,r44,r45,r46,r47) ;;
        LOAD8(r48,r49,r50,r51,r52,r53,r54,r55) ;;
        LOAD8(r56,r57,r58,r59,r60,r61,r62,r63) ;;
        LOAD8(r64,r65,r66,r67,r68,r69,r70,r71) ;;
        LOAD8(r72,r73,r74,r75,r76,r77,r78,r79) ;;
        LOAD8(r80,r81,r82,r83,r84,r85,r86,r87) ;;
        LOAD4(r88,r89,r90,r91) ;;

        /* Restore all floating-point registers not preserved by C */
        FLOAD2(f6,f7) ;;
        FLOAD8(f8,f9,f10,f11,f12,f13,f14,f15) ;;
        FLOAD8(f32,f33,f34,f35,f36,f37,f38,f39) ;;
        FLOAD8(f40,f41,f42,f43,f44,f45,f46,f47) ;;
        FLOAD8(f48,f49,f50,f51,f52,f53,f54,f55) ;;
        FLOAD8(f56,f57,f58,f59,f60,f61,f62,f63) ;;

        /* Reload new allocation pointer and allocation limit */
        LOADGLOBAL(r4, caml_young_ptr#)
        LOADGLOBAL(r5, caml_young_limit#)

        /* Allocate space for the block */
        add     r3 = 16, sp ;;
        ld8     r2 = [r3] ;;
        sub     r4 = r4, r2 ;;
        cmp.ltu p6, p0 = r4, r5         /* enough space? */
        (p6) br.cond.spnt .L100 ;;      /* no: call GC again */

        /* Reload return address and say that we are back into Caml code */
        ADDRGLOBAL(r3, caml_last_return_address#) ;;
        ld8     r2 = [r3]
        st8     [r3] = r0 ;;

        /* Return to caller */
        mov     b0 = r2
        add     sp = (16 + 16 + 80*8 + 42*8), sp ;;
        br.ret.sptk b0

        .endp   caml_call_gc#

/* Call a C function from Caml */
/* Function to call is in r2 */

        .global caml_c_call#
        .proc   caml_c_call#
        .align  16

caml_c_call:
        /* The Caml code that called us does not expect any
           code-generator registers to be preserved */

        /* Recover entry point from the function pointer in r2 */
        LD8OFF(r3, r2, 8) ;;
        mov     b6 = r3

        /* Preserve gp in r7 */
        mov     r7 = gp

        /* Record lowest stack address and return address */
	mov	r14 = b0
        STOREGLOBAL(sp, caml_bottom_of_stack#) ;;
        STOREGLOBAL(r14, caml_last_return_address#)

        /* Make the exception handler and alloc ptr available to the C code */
        STOREGLOBAL(r4, caml_young_ptr#)
        STOREGLOBAL(r6, caml_exception_pointer#)

        /* Recover gp from the function pointer in r2 */
        ld8     gp = [r2]

        /* Call the function */
        br.call.sptk    b0 = b6 ;;

        /* Restore gp */
        mov     gp = r7 ;;

        /* Reload alloc ptr and alloc limit */
        LOADGLOBAL(r4, caml_young_ptr#)
        LOADGLOBAL(r5, caml_young_limit#)

        /* Reload return address and say that we are back into Caml code */
        ADDRGLOBAL(r3, caml_last_return_address#) ;;
        ld8     r2 = [r3]
        st8     [r3] = r0 ;;

        /* Return to caller */
        mov     b0 = r2 ;;
        br.ret.sptk b0

        .endp   caml_c_call#

/* Start the Caml program */

        .global caml_start_program#
        .proc   caml_start_program#
        .align  16

caml_start_program:
        ADDRGLOBAL(r2, caml_program#) ;;
        mov     b6 = r2

        /* Code shared with caml_callback* */
.L103:        
        /* Allocate 64 "out" registers (for the Caml code) and no locals */
        alloc	r3 = ar.pfs, 0, 0, 64, 0
        add     sp = -(56 * 8), sp ;;

        /* Save all callee-save registers on stack */
        add     r2 = 16, sp ;;
	ST8OFF(r2, r3, 8)       /* 0 : ar.pfs */
        mov     r3 = b0 ;;
        ST8OFF(r2, r3, 8) ;;    /* 1 : return address */
        ST8OFF(r2, gp, 8)       /* 2 : gp */
        mov     r3 = pr ;;
        ST8OFF(r2, r3, 8)       /* 3 : predicates */
        mov     r3 = ar.fpsr ;;
        ST8OFF(r2, r3, 8)       /* 4 : ar.fpsr */
        mov     r3 = ar.unat ;;
        ST8OFF(r2, r3, 8)       /* 5 : ar.unat */
        mov     r3 = ar.lc ;;
        ST8OFF(r2, r3, 8)       /* 6 : ar.lc */
        mov     r3 = b1 ;;
        ST8OFF(r2, r3, 8)       /* 7 - 11 : b1 - b5 */
        mov     r3 = b2 ;;
        ST8OFF(r2, r3, 8)
        mov     r3 = b3 ;;
        ST8OFF(r2, r3, 8)
        mov     r3 = b4 ;;
        ST8OFF(r2, r3, 8)
        mov     r3 = b5 ;;
        ST8OFF(r2, r3, 8) ;;

        add     r3 = 8, r2 ;;
        SAVE4(r4,r5,r6,r7) ;;   /* 12 - 15 : r4 - r7 */

        add     r3 = 16, r2 ;;  /* 16 - 55 : f2 - f5, f16 - f31 */
        FSPILL4(f2,f3,f4,f5) ;;
        FSPILL8(f16,f17,f18,f19,f20,f21,f22,f23) ;;
        FSPILL8(f24,f25,f26,f27,f28,f29,f30,f31) ;;

        /* Set up a callback link on the stack.  In addition to
           the normal callback link contents (saved values of
           caml_bottom_of_stack, caml_last_return_address and
           caml_gc_regs), we also save there caml_saved_bsp
           and caml_saved_rnat */
        add     sp = -48, sp
        LOADGLOBAL(r3, caml_bottom_of_stack#)
        add     r2 = 16, sp ;;
        ST8OFF(r2, r3, 8)
        LOADGLOBAL(r3, caml_last_return_address#) ;;
        ST8OFF(r2, r3, 8)
        LOADGLOBAL(r3, caml_gc_regs#) ;;
        ST8OFF(r2, r3, 8)
        LOADGLOBAL(r3, caml_saved_bsp#) ;;
        ST8OFF(r2, r3, 8)
        LOADGLOBAL(r3, caml_saved_rnat#) ;;
        ST8OFF(r2, r3, 8)

        /* Set up a trap frame to catch exceptions escaping the Caml code */
        mov     r6 = sp
        add     sp = -16, sp ;;
        LOADGLOBAL(r3, caml_exception_pointer#)
        add     r2 = 16, sp ;;
        ST8OFF(r2, r3, 8)
.L110:	mov	r3 = ip ;;
	add	r3 = .L101 - .L110, r3 ;;
        ST8OFF(r2, r3, 8) ;;

        /* Save ar.bsp, flush register window, and save ar.rnat */
        mov     r2 = ar.bsp ;;
        STOREGLOBAL(r2, caml_saved_bsp#) ;;
        mov     r14 = ar.rsc ;;
        and     r2 = ~0x3, r14;;        /* set rsc.mode = 0 */
	mov	ar.rsc = r2 ;;		/* RSE is in enforced lazy mode */
        flushrs ;;                      /* must be first instr in group */
        mov     r2 = ar.rnat ;;
        STOREGLOBAL(r2, caml_saved_rnat#)
	mov	ar.rsc = r14		/* restore original RSE mode */

        /* Reload allocation pointers */
        LOADGLOBAL(r4, caml_young_ptr#)
        LOADGLOBAL(r5, caml_young_limit#)

        /* We are back into Caml code */
        STOREGLOBAL(r0, caml_last_return_address#)

        /* Call the Caml code */
        br.call.sptk b0 = b6 ;;
.L102:

        /* Pop the trap frame, restoring caml_exception_pointer */
        add     sp = 16, sp ;;
        ld8     r2 = [sp] ;;
        STOREGLOBAL(r2, caml_exception_pointer#)

.L104:
        /* Pop the callback link, restoring the global variables */
        add     r14 = 16, sp ;;
        LD8OFF(r2, r14, 8) ;;
        STOREGLOBAL(r2, caml_bottom_of_stack#)
        LD8OFF(r2, r14, 8) ;;
        STOREGLOBAL(r2, caml_last_return_address#)
        LD8OFF(r2, r14, 8) ;;
        STOREGLOBAL(r2, caml_gc_regs#)
        LD8OFF(r2, r14, 8) ;;
        STOREGLOBAL(r2, caml_saved_bsp#)
        LD8OFF(r2, r14, 8) ;;
        STOREGLOBAL(r2, caml_saved_rnat#)
        add     sp = 48, sp

        /* Update allocation pointer */
        STOREGLOBAL(r4, caml_young_ptr#)

        /* Restore all callee-save registers from stack */
        add     r2 = 16, sp ;;
	LD8OFF(r3, r2, 8) ;;    /* 0 : ar.pfs */
        mov     ar.pfs = r3
        LD8OFF(r3, r2, 8) ;;    /* 1 : return address */
        mov     b0 = r3
        LD8OFF(gp, r2, 8) ;;    /* 2 : gp */
        LD8OFF(r3, r2, 8) ;;    /* 3 : predicates */
        mov     pr = r3, -1
        LD8OFF(r3, r2, 8) ;;    /* 4 : ar.fpsr */
        mov     ar.fpsr = r3
        LD8OFF(r3, r2, 8) ;;    /* 5 : ar.unat */
        mov     ar.unat = r3
        LD8OFF(r3, r2, 8) ;;    /* 6 : ar.lc */
        mov     ar.lc = r3
        LD8OFF(r3, r2, 8) ;;    /* 7 - 11 : b1 - b5 */
        mov     b1 = r3
        LD8OFF(r3, r2, 8) ;;
        mov     b2 = r3
        LD8OFF(r3, r2, 8) ;;
        mov     b3 = r3
        LD8OFF(r3, r2, 8) ;;
        mov     b4 = r3
        LD8OFF(r3, r2, 8) ;;
        mov     b5 = r3

        add     r3 = 8, r2 ;;
        LOAD4(r4,r5,r6,r7) ;;   /* 12 - 15 : r4 - r7 */

        add     r3 = 16, r2 ;;  /* 16 - 55 : f2 - f5, f16 - f31 */
        FFILL4(f2,f3,f4,f5) ;;
        FFILL8(f16,f17,f18,f19,f20,f21,f22,f23) ;;
        FFILL8(f24,f25,f26,f27,f28,f29,f30,f31) ;;

        /* Pop stack frame and return */
        add     sp = (56 * 8), sp
        br.ret.sptk.many b0 ;;

        /* The trap handler */
.L101:
        /* Save exception pointer */
        STOREGLOBAL(r6, caml_exception_pointer#)

        /* Encode exception bucket as exception result */
        or      r8 = 2, r8

        /* Return it */
        br.sptk .L104 ;;

        .endp   caml_start_program#

/* Raise an exception from C */

        .global caml_raise_exception#
        .proc   caml_raise_exception#
        .align  16
caml_raise_exception:
        /* Allocate 64 "out" registers (for the Caml code) and no locals */
        /* Since we don't return, don't bother saving the PFS */
        alloc	r2 = ar.pfs, 0, 0, 64, 0

        /* Move exn bucket where Caml expects it */
        mov     r8 = r32 ;;

        /* Perform "context switch" as per the Software Conventions Guide,
           chapter 10 */
        flushrs ;;                      /* flush dirty registers to stack */
        mov     r14 = ar.rsc ;;
        and     r2 = ~0x3, r14;;        /* set rsc.mode = 0 */
        dep     r2 = r0, r2, 16, 4 ;;   /* clear rsc.loadrs */
        mov     ar.rsc = r2 ;;          /* RSE is in enforced lazy mode */
        invala ;;                       /* Invalidate ALAT */
        LOADGLOBAL(r2, caml_saved_bsp#) ;;
        mov     ar.bspstore = r2        /* Restore ar.bspstore */
        LOADGLOBAL(r2, caml_saved_rnat#) ;;
        mov     ar.rnat = r2            /* Restore ar.rnat */
        mov     ar.rsc = r14 ;;         /* Restore original RSE mode */

        /* Reload allocation pointers and exception pointer */
        LOADGLOBAL(r4, caml_young_ptr#)
        LOADGLOBAL(r5, caml_young_limit#)
        LOADGLOBAL(r6, caml_exception_pointer#)

        /* Say that we're back into Caml */
        STOREGLOBAL(r0, caml_last_return_address#)

        /* Raise the exception proper */
        mov     sp = r6
        add     r2 = 8, r6 ;;
        ld8     r6 = [r6]
        ld8     r2 = [r2] ;;
        mov     b6 = r2 ;;

	/* Branch to handler.  Must use a call so as to set up the
	   CFM and PFS correctly. */
        br.call.sptk.many b0 = b6

        .endp   caml_raise_exception

/* Callbacks from C to Caml */

        .global caml_callback_exn#
        .proc   caml_callback_exn#
        .align  16
caml_callback_exn:
        /* Initial shuffling of arguments */
        ld8     r3 = [r32]              /* code pointer */
        mov     r2 = r32
        mov     r32 = r33 ;;            /* first arg */
        mov     r33 = r2                /* environment */
        mov     b6 = r3
        br.sptk .L103 ;;

        .endp   caml_callback_exn#

        .global caml_callback2_exn#
        .proc   caml_callback2_exn#
        .align  16
caml_callback2_exn:
        /* Initial shuffling of arguments */
        ADDRGLOBAL(r3, caml_apply2)    /* code pointer */
        mov     r2 = r32
        mov     r32 = r33               /* first arg */
        mov     r33 = r34 ;;            /* second arg */
        mov     r34 = r2                /* environment */
        mov     b6 = r3
        br.sptk .L103 ;;

        .endp   caml_callback2_exn#

        .global caml_callback3_exn#
        .proc   caml_callback3_exn#
        .align  16
caml_callback3_exn:
        /* Initial shuffling of arguments */
        ADDRGLOBAL(r3, caml_apply3)    /* code pointer */
        mov     r2 = r32
        mov     r32 = r33               /* first arg */
        mov     r33 = r34               /* second arg */
        mov     r34 = r35 ;;            /* third arg */
        mov     r35 = r2                /* environment */
        mov     b6 = r3
        br.sptk .L103 ;;

        .endp   caml_callback3_exn#

/* Glue code to call [caml_array_bound_error] */

        .global caml_ml_array_bound_error#
        .proc   caml_ml_array_bound_error#
        .align  16
caml_ml_array_bound_error:
        ADDRGLOBAL(r2, @fptr(caml_array_bound_error#))
        br.sptk caml_c_call             /* never returns */        

        .rodata

        .global caml_system__frametable#
        .type   caml_system__frametable#, @object
        .size   caml_system__frametable#, 8
caml_system__frametable:
        data8   1               /* one descriptor */
        data8   .L102           /* return address into callback */
        data2   -1              /* negative frame size => use callback link */
        data2   0               /* no roots here */
        .align  8

/* Global variables used by caml_raise_exception */

        .common caml_saved_bsp#, 8, 8
        .common caml_saved_rnat#, 8, 8
